#####################
# Sleep Project - Pedro Bessone, Gautam Rao, Heather Schofield, Frank Schilbach, and Mattie Toma
# Purpose: Runs the step-down procedure for adjusted p-values for table 4. Takes in the unadjusted
#          p-values from Table_4.do and the RI p-values from Table_4_RI.do and exports the adjusted
#          p-values.
# Last edited: 07 May 2021
#####################

##Set-up
rm(list = ls())
options(scipen = 99)
require(rio)
require(tidyverse)

# Load function that performs step-down adjustment 
source("Scripts/adjust_pval_step_down_fun.R")

# Load actual p-values
df.p = import("Datasets/pvals_unadjusted.csv") %>% 
  as_tibble()
colnames(df.p) = str_remove(colnames(df.p), pattern = "1")

# Divide them into the families that we want to correct p-values
df.family = df.p %>% 
  filter( var_name %in%  c("earnings", "wellbeing", "cogindex", "pref") )

df.work = df.p %>% 
  filter( var_name %in%  c("prod", "typing", "output"))

df.wb = df.p %>% 
  filter( var_name %in%  c("physical", "psych"))

df.cog = df.p %>% 
  filter( var_name %in%  c("cogfunction", "attention"))

df.pref = df.p %>% 
  filter( var_name %in%  c("timepref", "social", "risk"))

# Load RI p-vals/t-stats
df.ri.pvals.raw = import("Datasets/mht_ri_pvals_table_pool.dta") %>% as_tibble()

# Keep only pvals and t-stats
df.ri.pvals = df.ri.pvals.raw %>% 
  select(contains(c("tstat_", "p_"))) %>% 
  select(!contains(c("break", "work")))

# Transform t-stats into p-vals
t_to_pval = function(t){
  pval = 2*pnorm(-abs(t))
}

df.ri.pvals = df.ri.pvals %>% 
  mutate(across(.cols = contains("tstat_"), .fns = t_to_pval))

# Rename t_stats to p_vals 
colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "tstat_")] = 
  str_replace(colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "tstat_")], pattern = "tstat_", replacement = "p_")

# Rename phys to physical so it has the same name for original and RI p-vals
colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "phys")] = 
  str_replace(colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "phys")], pattern = "phys", replacement = "physical")

# Rename cog_ to cogfunction_ so it has the same name for original and RI p-vals
colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "cog_")] = 
  str_replace(colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "cog_")], pattern = "cog_", replacement = "cogfunction_")

# Rename salience to attention so it has the same name for original and RI p-vals
colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "salience")] = 
  str_replace(colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "salience")], pattern = "salience", replacement = "attention")

# Rename time to timepref so it has the same name for original and RI p-vals
colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "time")] = 
  str_replace(colnames(df.ri.pvals)[str_detect(colnames(df.ri.pvals), "time")], pattern = "time", replacement = "timepref")

# Divide them into the families that we want to correct p-values
df.family.RI = df.ri.pvals %>% 
  select(contains(c("p_earnings", "p_wellbeing", "p_cogindex", "p_pref")))

df.work.RI = df.ri.pvals %>% 
  select(contains(c("prod", "typing", "output")))

df.wb.RI = df.ri.pvals %>% 
  select(contains(c("physical", "psych")))

df.cog.RI = df.ri.pvals %>% 
  select(contains(c("cogfunction", "attention")))

df.pref.RI = df.ri.pvals %>% 
  select(contains(c("timepref", "social", "risk")))

# Function to get p-vals for different comparisons
get_adjust_pvals = function(df.original, df.RI){
  ### Correct family-wise outcomes ####
  # Night-sleep vs. Control
  p_val_original = df.original$p_val_ns
  p_val_RI_mat   = df.RI %>% select(!contains("nap")) %>% as.matrix
  
  p_val_mht_ns_cont = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Nap vs. Control
  p_val_original = df.original$p_val_nap
  p_val_RI_mat   = df.RI %>% select(!contains("ns")) %>% as.matrix
  
  p_val_mht_nap_cont = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  # Nap vs. Night-Sleep
  p_val_original = df.original$p_val_d_nap_ns
  p_val_RI_mat   = df.RI %>% select(contains("_d_")) %>% as.matrix
  
  p_val_mht_nap_ns = adjust_pval_step_down_fun(p_val_original, p_val_RI_mat)
  
  df.res = tibble(
    comparison = rep(c("NS vs. Cont","Nap vs. Cont", "Nap vs. NS"),rep(nrow(df.original),3)),
    outcome = rep(df.original$var_name, 3),
    p_val_original = c(df.original$p_val_ns, df.original$p_val_nap, df.original$p_val_d_nap_ns),
    p_val_adj = c(p_val_mht_ns_cont, p_val_mht_nap_cont, p_val_mht_nap_ns)
    )
  
  return(df.res)
  
}

# Correction across family-wide outcomes
df.pval.adj.family = get_adjust_pvals(df.family, df.family.RI)
# Correction across work outcomes
df.pval.adj.work = get_adjust_pvals(df.work, df.work.RI)
# Correction across well-being outcomes
df.pval.adj.wb = get_adjust_pvals(df.wb, df.wb.RI)
# Correction across cognition outcomes
df.pval.adj.cog = get_adjust_pvals(df.cog, df.cog.RI)
# Correction across family-wide outcomes
df.pval.adj.pref = get_adjust_pvals(df.pref, df.pref.RI)

df.pval.all = bind_rows(
  df.pval.adj.family,
  df.pval.adj.work,
  df.pval.adj.wb,
  df.pval.adj.cog,
  df.pval.adj.pref
  )

export(df.pval.all, "Datasets/pval_adj_table_pool.csv")





